* possibility, which may occur if the block was passed to us by control
* tools or through VCPUOP_initialise, by silently clearing the block.
*/
- asm volatile (
- /* See above for why the operands/constraints are this way. */
- "1: " REX64_PREFIX "fxrstor (%2)\n"
- ".section .fixup,\"ax\" \n"
- "2: push %%"__OP"ax \n"
- " push %%"__OP"cx \n"
- " push %%"__OP"di \n"
- " lea %0,%%"__OP"di \n"
- " mov %1,%%ecx \n"
- " xor %%eax,%%eax \n"
- " rep ; stosl \n"
- " pop %%"__OP"di \n"
- " pop %%"__OP"cx \n"
- " pop %%"__OP"ax \n"
- " jmp 1b \n"
- ".previous \n"
- _ASM_EXTABLE(1b, 2b)
- :
- : "m" (*fpu_ctxt),
- "i" (sizeof(v->arch.xsave_area->fpu_sse)/4)
- ,"cdaSDb" (fpu_ctxt)
- );
+ switch ( __builtin_expect(fpu_ctxt[FPU_WORD_SIZE_OFFSET], 8) )
+ {
+ default:
+ asm volatile (
+ /* See below for why the operands/constraints are this way. */
+ "1: " REX64_PREFIX "fxrstor (%2)\n"
+ ".section .fixup,\"ax\" \n"
+ "2: push %%"__OP"ax \n"
+ " push %%"__OP"cx \n"
+ " push %%"__OP"di \n"
+ " mov %2,%%"__OP"di \n"
+ " mov %1,%%ecx \n"
+ " xor %%eax,%%eax \n"
+ " rep ; stosl \n"
+ " pop %%"__OP"di \n"
+ " pop %%"__OP"cx \n"
+ " pop %%"__OP"ax \n"
+ " jmp 1b \n"
+ ".previous \n"
+ _ASM_EXTABLE(1b, 2b)
+ :
+ : "m" (*fpu_ctxt),
+ "i" (sizeof(v->arch.xsave_area->fpu_sse) / 4),
+ "cdaSDb" (fpu_ctxt) );
+ break;
+ case 4: case 2:
+ asm volatile (
+ "1: fxrstor %0 \n"
+ ".section .fixup,\"ax\"\n"
+ "2: push %%"__OP"ax \n"
+ " push %%"__OP"cx \n"
+ " push %%"__OP"di \n"
+ " lea %0,%%"__OP"di \n"
+ " mov %1,%%ecx \n"
+ " xor %%eax,%%eax \n"
+ " rep ; stosl \n"
+ " pop %%"__OP"di \n"
+ " pop %%"__OP"cx \n"
+ " pop %%"__OP"ax \n"
+ " jmp 1b \n"
+ ".previous \n"
+ _ASM_EXTABLE(1b, 2b)
+ :
+ : "m" (*fpu_ctxt),
+ "i" (sizeof(v->arch.xsave_area->fpu_sse) / 4) );
+ break;
+ }
}
/* Restore x87 extended state */
/* Save x87 FPU, MMX, SSE and SSE2 state */
static inline void fpu_fxsave(struct vcpu *v)
{
- char *fpu_ctxt = v->arch.fpu_ctxt;
+ typeof(v->arch.xsave_area->fpu_sse) *fpu_ctxt = v->arch.fpu_ctxt;
+ int word_size = cpu_has_fpu_sel ? 8 : 0;
- /*
- * The only way to force fxsaveq on a wide range of gas versions. On
- * older versions the rex64 prefix works only if we force an
- * addressing mode that doesn't require extended registers.
- */
- asm volatile (
- REX64_PREFIX "fxsave (%1)"
- : "=m" (*fpu_ctxt) : "cdaSDb" (fpu_ctxt) );
+ if ( !is_pv_32bit_vcpu(v) )
+ {
+ /*
+ * The only way to force fxsaveq on a wide range of gas versions.
+ * On older versions the rex64 prefix works only if we force an
+ * addressing mode that doesn't require extended registers.
+ */
+ asm volatile ( REX64_PREFIX "fxsave (%1)"
+ : "=m" (*fpu_ctxt) : "cdaSDb" (fpu_ctxt) );
+
+ /*
+ * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
+ * is pending.
+ */
+ if ( !(fpu_ctxt->fsw & 0x0080) &&
+ boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
+ word_size = -1;
+
+ if ( word_size > 0 &&
+ !((fpu_ctxt->fip.addr | fpu_ctxt->fdp.addr) >> 32) )
+ {
+ struct ix87_env fpu_env;
+
+ asm volatile ( "fnstenv %0" : "=m" (fpu_env) );
+ fpu_ctxt->fip.sel = fpu_env.fcs;
+ fpu_ctxt->fdp.sel = fpu_env.fds;
+ word_size = 4;
+ }
+ }
+ else
+ {
+ asm volatile ( "fxsave %0" : "=m" (*fpu_ctxt) );
+ word_size = 4;
+ }
+
+ if ( word_size >= 0 )
+ fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = word_size;
/* Clear exception flags if FSW.ES is set. */
- if ( unlikely(fpu_ctxt[2] & 0x80) )
+ if ( unlikely(fpu_ctxt->fsw & 0x0080) )
asm volatile ("fnclex");
/*
#include <asm/current.h>
#include <asm/processor.h>
#include <asm/hvm/support.h>
+#include <asm/i387.h>
#include <asm/xstate.h>
#include <asm/asm_defns.h>
struct xsave_struct *ptr = v->arch.xsave_area;
uint32_t hmask = mask >> 32;
uint32_t lmask = mask;
+ int word_size = mask & XSTATE_FP ? (cpu_has_fpu_sel ? 8 : 0) : -1;
- if ( cpu_has_xsaveopt )
- asm volatile (
- ".byte " REX_PREFIX "0x0f,0xae,0x37"
- :
- : "a" (lmask), "d" (hmask), "D"(ptr)
- : "memory" );
+ if ( word_size <= 0 || !is_pv_32bit_vcpu(v) )
+ {
+ if ( cpu_has_xsaveopt )
+ asm volatile ( ".byte 0x48,0x0f,0xae,0x37"
+ : "=m" (*ptr)
+ : "a" (lmask), "d" (hmask), "D" (ptr) );
+ else
+ asm volatile ( ".byte 0x48,0x0f,0xae,0x27"
+ : "=m" (*ptr)
+ : "a" (lmask), "d" (hmask), "D" (ptr) );
+
+ if ( !(mask & ptr->xsave_hdr.xstate_bv & XSTATE_FP) ||
+ /*
+ * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
+ * is pending.
+ */
+ (!(ptr->fpu_sse.fsw & 0x0080) &&
+ boot_cpu_data.x86_vendor == X86_VENDOR_AMD) )
+ return;
+
+ if ( word_size > 0 &&
+ !((ptr->fpu_sse.fip.addr | ptr->fpu_sse.fdp.addr) >> 32) )
+ {
+ struct ix87_env fpu_env;
+
+ asm volatile ( "fnstenv %0" : "=m" (fpu_env) );
+ ptr->fpu_sse.fip.sel = fpu_env.fcs;
+ ptr->fpu_sse.fdp.sel = fpu_env.fds;
+ word_size = 4;
+ }
+ }
else
- asm volatile (
- ".byte " REX_PREFIX "0x0f,0xae,0x27"
- :
- : "a" (lmask), "d" (hmask), "D"(ptr)
- : "memory" );
+ {
+ if ( cpu_has_xsaveopt )
+ asm volatile ( ".byte 0x0f,0xae,0x37"
+ : "=m" (*ptr)
+ : "a" (lmask), "d" (hmask), "D" (ptr) );
+ else
+ asm volatile ( ".byte 0x0f,0xae,0x27"
+ : "=m" (*ptr)
+ : "a" (lmask), "d" (hmask), "D" (ptr) );
+ word_size = 4;
+ }
+ if ( word_size >= 0 )
+ ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] = word_size;
}
void xrstor(struct vcpu *v, uint64_t mask)
{
uint32_t hmask = mask >> 32;
uint32_t lmask = mask;
-
struct xsave_struct *ptr = v->arch.xsave_area;
/*
* possibility, which may occur if the block was passed to us by control
* tools or through VCPUOP_initialise, by silently clearing the block.
*/
- asm volatile ( "1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n"
- ".section .fixup,\"ax\"\n"
- "2: mov %5,%%ecx \n"
- " xor %1,%1 \n"
- " rep stosb \n"
- " lea %2,%0 \n"
- " mov %3,%1 \n"
- " jmp 1b \n"
- ".previous \n"
- _ASM_EXTABLE(1b, 2b)
- : "+&D" (ptr), "+&a" (lmask)
- : "m" (*ptr), "g" (lmask), "d" (hmask),
- "m" (xsave_cntxt_size)
- : "ecx" );
+ switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) )
+ {
+ default:
+ asm volatile ( "1: .byte 0x48,0x0f,0xae,0x2f\n"
+ ".section .fixup,\"ax\" \n"
+ "2: mov %5,%%ecx \n"
+ " xor %1,%1 \n"
+ " rep stosb \n"
+ " lea %2,%0 \n"
+ " mov %3,%1 \n"
+ " jmp 1b \n"
+ ".previous \n"
+ _ASM_EXTABLE(1b, 2b)
+ : "+&D" (ptr), "+&a" (lmask)
+ : "m" (*ptr), "g" (lmask), "d" (hmask),
+ "m" (xsave_cntxt_size)
+ : "ecx" );
+ break;
+ case 4: case 2:
+ asm volatile ( "1: .byte 0x0f,0xae,0x2f\n"
+ ".section .fixup,\"ax\" \n"
+ "2: mov %5,%%ecx \n"
+ " xor %1,%1 \n"
+ " rep stosb \n"
+ " lea %2,%0 \n"
+ " mov %3,%1 \n"
+ " jmp 1b \n"
+ ".previous \n"
+ _ASM_EXTABLE(1b, 2b)
+ : "+&D" (ptr), "+&a" (lmask)
+ : "m" (*ptr), "g" (lmask), "d" (hmask),
+ "m" (xsave_cntxt_size)
+ : "ecx" );
+ break;
+ }
}
bool_t xsave_enabled(const struct vcpu *v)
#define X86_FEATURE_ERMS (7*32+ 9) /* Enhanced REP MOVSB/STOSB */
#define X86_FEATURE_INVPCID (7*32+10) /* Invalidate Process Context ID */
#define X86_FEATURE_RTM (7*32+11) /* Restricted Transactional Memory */
+#define X86_FEATURE_NO_FPU_SEL (7*32+13) /* FPU CS/DS stored as zero */
#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability)
#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability)
#define cpu_has_fsgsbase boot_cpu_has(X86_FEATURE_FSGSBASE)
#define cpu_has_smep boot_cpu_has(X86_FEATURE_SMEP)
+#define cpu_has_fpu_sel (!boot_cpu_has(X86_FEATURE_NO_FPU_SEL))
#define cpu_has_ffxsr ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) \
&& boot_cpu_has(X86_FEATURE_FFXSR))
#include <xen/types.h>
#include <xen/percpu.h>
+/* Byte offset of the stored word size within the FXSAVE area/portion. */
+#define FPU_WORD_SIZE_OFFSET 511
+
+struct ix87_state {
+ struct ix87_env {
+ uint16_t fcw, _res0;
+ uint16_t fsw, _res1;
+ uint16_t ftw, _res2;
+ uint32_t fip;
+ uint16_t fcs;
+ uint16_t fop;
+ uint32_t fdp;
+ uint16_t fds, _res6;
+ } env;
+ struct ix87_reg {
+ uint64_t mantissa;
+ uint16_t exponent:15;
+ uint16_t sign:1;
+ } __attribute__((__packed__)) r[8];
+};
+
void vcpu_restore_fpu_eager(struct vcpu *v);
void vcpu_restore_fpu_lazy(struct vcpu *v);
void vcpu_save_fpu(struct vcpu *v);
#define XSTATE_NONLAZY (XSTATE_LWP)
#define XSTATE_LAZY (XSTATE_ALL & ~XSTATE_NONLAZY)
-#define REX_PREFIX "0x48, "
-
/* extended state variables */
DECLARE_PER_CPU(uint64_t, xcr0);